rm(list = ls())
## llamando a las librerias que (creemos que) vamos a usar
options("install.lock"=FALSE)
if (!require ("pacman")) install.packages ("pacman")

pacman::p_load (dplyr
                , haven
                , ggplot2
                , ggpubr
                , ggthemes  ## si quieren mas themes
                , readstata13
                , readxl
                , sf
                , tidyverse
                , tidyr
                , units
                , viridis ## paleta de colores Viridis
                , wesanderson## p/usar paleta de colores de Wes Anderson
                , stringr
                , RColorBrewer
                , patchwork
                , Rmisc
                , lfe
                , stargazer
                , AER
                , haven
                , skimr
                , modelsummary
                , terra
                , fixest
                , vtable
                , did
                , cowplot
                , grid)

setwd("/Users/mateovasquez/Dropbox/")

#####

base <- read.dta13("master_violence_landtitle_caracteristicas.dta")

#//3. Diff-in-Diff (Benchmark Specification) - Disaggregated 

#Outcome Variable
names(base)

base <- base %>% 
  dplyr::mutate(total_violence = rowSums(across(c(sumattacks_public, sumattacks_guerrilla,sumattacks_paramilitary)), na.rm = TRUE),
                health_inst = rowSums(across(c(health_post, health_center,hospital)), na.rm = TRUE))


#Renaming Variables 

base <- base %>% 
  dplyr::rename("indian_title_area_cs" = "area_resguardos_indigenas_cs",
                "indian_title_area" = "area_resguardos_indigenas",
                "afro_title_area_cs" = "area_titul_comunidades_negras_cs",
                "afro_title_area" = "area_titul_comunidades_negras")

#Treatment 	

base <- base %>% 
  dplyr::mutate(post = ifelse(Año>1995,1,0),
                any_titleXpost = any_title*post,
                any_title2 = any_title,
                post2 = post)


#the mean level of violence in neighboring municipalities (1988–1995)
Neighbors <- base %>% 
  dplyr::filter(Año<=1995) %>% 
  dplyr::group_by(codmpio) %>% 
  dplyr::summarise(spatlagguerrsum100_100_1995 = mean(spatlagguerrsum100_100, na.rm = T))

summary(Neighbors$spatlagguerrsum100_100_1995)

base <- left_join(base,Neighbors,
                  by = "codmpio")

#Controls X post
base <- base %>%
  dplyr::mutate(
    altura_post = altura*post, #ELEVATION
    rainfall_post = rainfall*post, #RAINFALL
    coca_0_1_post = coca_0_1*post, # COCA
    presence_mines1560_post = presence_mines1560*post, #GOLD MINES
    num_encomiendas1560_post = num_encomiendas1560*post, #ENCOMIENDAS
    royalroaddist_post = royalroaddist*post, #distance to royal roads
    distancia_mercado_post = distancia_mercado*post, #nearest market towns (km)
    pop95_post = pop95*post, #municipal population in 1995 
    pct_minority85_post = pct_minority85*post, #share of minority population in 1985 (%)
    conflictos_1901_1917_post = conflictos_1901_1917*post,#historical conflict using data on the inci- dence of land conflict between 1901 and 1931
    conflictos_1918_1931_post = conflictos_1918_1931*post,#historical conflict using data on the inci- dence of land conflict between 1901 and 1931
    Violencia_48_a_53_post = Violencia_48_a_53*post, #presence of armed combat during La Violencia (1948–1953)
    anucraids1971_78_post = anucraids1971_78*post, #frequency of land invasions between 1971 and 1978
    spatlagguerrsum100_100_1995_post = spatlagguerrsum100_100_1995*post, #the mean level of violence in neighboring municipalities (1988–1995)
    ltotal_plots_rfmd_1960_85_post = ltotal_plots_rfmd_1960_85*post, # total number of plots reformed between 1960 and 1985 to proxy for rural grievances.
    totviolencia85_post = totviolencia85*post) #ojo: La Violencia (1948 - 1958)

# Applying asinh transformation to each specified variable directly in mutate

base <- base %>%
  mutate(
    as_total_violence = asinh(total_violence),
    as_sumattacks_public = asinh(sumattacks_public),
    as_sumattacks_paramilitary = asinh(sumattacks_paramilitary),
    as_sumattacks_guerrilla = asinh(sumattacks_guerrilla))

# Running the regression with felm, including fixed effects and clustering

base <- base %>%
  mutate(Depto_year = paste(coddepto, Año, sep = "_"))

base <- base %>% 
  dplyr::group_by(codmpio,Año) %>% 
  dplyr::mutate(first_title = sum(landtitle == 1)) %>% 
  dplyr::ungroup() %>% 
  dplyr::group_by(codmpio) %>% 
  dplyr::mutate(Validation_year = ifelse(first_title == 1, Año,NA),
                first_title_year = min(Validation_year, na.rm = TRUE)) %>% 
  dplyr::ungroup()

base$first_title_year[base$first_title_year == Inf] <- 0

base <- base %>% 
  dplyr::mutate(time_to_event = Año - first_title_year,
                time_to_event = ifelse(first_title_year ==0 ,0,time_to_event))

#OTRA FORMA DE HACERLO
est_par = feols(as_sumattacks_paramilitary ~
                  any_titleXpost + 
                  any_title2 + 
                  post2 + 
                  altura_post + 
                  altura + 
                  rainfall_post + 
                  rainfall + 
                  coca_0_1_post + 
                  coca_0_1 + 
                  presence_mines1560_post + 
                  presence_mines1560 + 
                  num_encomiendas1560_post  + 
                  num_encomiendas1560 + 
                  royalroaddist_post + 
                  royalroaddist + 
                  distancia_mercado_post + 
                  distancia_mercado + 
                  pop95_post + 
                  pop95 + 
                  pct_minority85_post + 
                  pct_minority85 + 
                  conflictos_1901_1917_post +
                  conflictos_1901_1917 +
                  conflictos_1918_1931_post +
                  conflictos_1918_1931 +
                  anucraids1971_78_post + 
                  anucraids1971_78 + 
                  totviolencia85_post +
                  totviolencia85 +
                  spatlagguerrsum100_100_1995_post + 
                  spatlagguerrsum100_100_1995 + 
                  ltotal_plots_rfmd_1960_85_post +
                  ltotal_plots_rfmd_1960_85  + 
                  i(time_to_event, any_title2, ref = -1)| codmpio + Año, 
                cluster = "codmpio" ,
                data = base)

est_par.output <- cbind.data.frame(est_par$coefficients,
                                   SE = est_par$se) %>% 
  rownames_to_column() %>% 
  dplyr::filter(str_detect(rowname, fixed(":")))

est_par.output$rowname <-gsub( "time_to_event::", "", est_par.output$rowname )
est_par.output$rowname <-gsub( ":any_title2", "", est_par.output$rowname )

est_par.output <- est_par.output %>% 
  dplyr::rename("time" = "rowname",
                "Estimate" = "est_par$coefficients",
                "SE" = "SE") %>% 
  dplyr::mutate(time = as.numeric(time)) %>% 
  add_row(time = -1, Estimate = 0, SE = 0) %>% 
  dplyr::filter(time>=-9 & time<=9) %>% 
  dplyr::mutate(Estimate = Estimate*100,
                LB_CI = Estimate-(1.96*SE)*100,
                UB_CI = Estimate+(1.96*SE)*100) %>% 
  dplyr::rename("Estimate" = "Estimate")%>% 
  dplyr::select(-SE)

plot1_all <- ggplot(data = est_par.output, mapping = aes(y = Estimate, x = time)) +
  geom_point(size = 2) + 
  geom_line() +
  geom_line(aes(y = UB_CI), colour = "blue", linetype = "dashed") +
  geom_line(aes(y = LB_CI), colour = "blue", linetype = "dashed") +
  geom_hline(yintercept = 0, linetype="solid", color ="black") +
  geom_vline(xintercept = -1, linetype="solid", color ="black") +
  scale_x_continuous(limits=c(-10, 10), breaks = seq(-10, 10, by = 2)) + 
  scale_y_continuous(limits=c(-60, 140),breaks = seq(-60, 120, by = 20)) +
  ylab("Point Estimates") + 
  xlab("Years Until/Since Law 70 Land Title Assigned") +
  ggtitle("Paramilitar attacks") +
  theme_minimal()


est_pub = feols(as_sumattacks_public ~
                  any_titleXpost + 
                  any_title2 + 
                  post2 + 
                  altura_post + 
                  altura + 
                  rainfall_post + 
                  rainfall + 
                  coca_0_1_post + 
                  coca_0_1 + 
                  presence_mines1560_post + 
                  presence_mines1560 + 
                  num_encomiendas1560_post  + 
                  num_encomiendas1560 + 
                  royalroaddist_post + 
                  royalroaddist + 
                  distancia_mercado_post + 
                  distancia_mercado + 
                  pop95_post + 
                  pop95 + 
                  pct_minority85_post + 
                  pct_minority85 + 
                  conflictos_1901_1917_post +
                  conflictos_1901_1917 +
                  conflictos_1918_1931_post +
                  conflictos_1918_1931 +
                  anucraids1971_78_post + 
                  anucraids1971_78 + 
                  totviolencia85_post +
                  totviolencia85 +
                  spatlagguerrsum100_100_1995_post + 
                  spatlagguerrsum100_100_1995 + 
                  ltotal_plots_rfmd_1960_85_post +
                  ltotal_plots_rfmd_1960_85  + 
                  i(time_to_event, any_title2, ref = -1)| codmpio + Año, 
                cluster = "codmpio" ,
                data = base)

est_pub.output <- cbind.data.frame(est_pub$coefficients,
                                   SE = est_pub$se) %>% 
  rownames_to_column() %>% 
  dplyr::filter(str_detect(rowname, fixed(":")))

est_pub.output$rowname <-gsub( "time_to_event::", "", est_pub.output$rowname )
est_pub.output$rowname <-gsub( ":any_title2", "", est_pub.output$rowname )

est_pub.output <- est_pub.output %>% 
  dplyr::rename("time" = "rowname",
                "Estimate" = "est_pub$coefficients",
                "SE" = "SE") %>% 
  dplyr::mutate(time = as.numeric(time)) %>% 
  add_row(time = -1, Estimate = 0, SE = 0) %>% 
  dplyr::filter(time>=-9 & time<=9) %>% 
  dplyr::mutate(Estimate = Estimate*100,
                LB_CI = Estimate-(1.96*SE)*100,
                UB_CI = Estimate+(1.96*SE)*100) %>% 
  dplyr::rename("Estimate" = "Estimate")%>% 
  dplyr::select(-SE)

plot2_all <- ggplot(data = est_pub.output, mapping = aes(y = Estimate, x = time)) +
  geom_point(size = 2) + 
  geom_line() +
  geom_line(aes(y = UB_CI), colour = "blue", linetype = "dashed") +
  geom_line(aes(y = LB_CI), colour = "blue", linetype = "dashed") +
  geom_hline(yintercept = 0, linetype="solid", color ="black") +
  geom_vline(xintercept = -1, linetype="solid", color ="black") +
  scale_x_continuous(limits=c(-10, 10), breaks = seq(-10, 10, by = 2)) + 
  scale_y_continuous(limits=c(-60, 140),breaks = seq(-60, 120, by = 20)) +
  ylab("Point Estimates") + 
  xlab("Years Until/Since Law 70 Land Title Assigned") +
  ggtitle("Police & Army attacks attacks") +
  theme_minimal()

est_guer = feols(as_sumattacks_guerrilla ~
                   any_titleXpost + 
                   any_title2 + 
                   post2 + 
                   altura_post + 
                   altura + 
                   rainfall_post + 
                   rainfall + 
                   coca_0_1_post + 
                   coca_0_1 + 
                   presence_mines1560_post + 
                   presence_mines1560 + 
                   num_encomiendas1560_post  + 
                   num_encomiendas1560 + 
                   royalroaddist_post + 
                   royalroaddist + 
                   distancia_mercado_post + 
                   distancia_mercado + 
                   pop95_post + 
                   pop95 + 
                   pct_minority85_post + 
                   pct_minority85 + 
                   conflictos_1901_1917_post +
                   conflictos_1901_1917 +
                   conflictos_1918_1931_post +
                   conflictos_1918_1931 +
                   anucraids1971_78_post + 
                   anucraids1971_78 + 
                   totviolencia85_post +
                   totviolencia85 +
                   spatlagguerrsum100_100_1995_post + 
                   spatlagguerrsum100_100_1995 + 
                   ltotal_plots_rfmd_1960_85_post +
                   ltotal_plots_rfmd_1960_85  + 
                   i(time_to_event, any_title2, ref = -1)| codmpio + Año, 
                 cluster = "codmpio" ,
                 data = base)

est_guer.output <- cbind.data.frame(est_guer$coefficients,
                                    SE = est_guer$se) %>% 
  rownames_to_column() %>% 
  dplyr::filter(str_detect(rowname, fixed(":")))

est_guer.output$rowname <-gsub( "time_to_event::", "", est_guer.output$rowname )
est_guer.output$rowname <-gsub( ":any_title2", "", est_guer.output$rowname )

est_guer.output <- est_guer.output %>% 
  dplyr::rename("time" = "rowname",
                "Estimate" = "est_guer$coefficients",
                "SE" = "SE") %>% 
  dplyr::mutate(time = as.numeric(time)) %>% 
  add_row(time = -1, Estimate = 0, SE = 0) %>% 
  dplyr::filter(time>=-9 & time<=9) %>% 
  dplyr::mutate(Estimate = Estimate*100,
                LB_CI = Estimate-(1.96*SE)*100,
                UB_CI = Estimate+(1.96*SE)*100) %>% 
  dplyr::rename("Estimate" = "Estimate")%>% 
  dplyr::select(-SE)

plot3_all <- ggplot(data = est_guer.output, mapping = aes(y = Estimate, x = time)) +
  geom_point(size = 2) + 
  geom_line() +
  geom_line(aes(y = UB_CI), colour = "blue", linetype = "dashed") +
  geom_line(aes(y = LB_CI), colour = "blue", linetype = "dashed") +
  geom_hline(yintercept = 0, linetype="solid", color ="black") +
  geom_vline(xintercept = -1, linetype="solid", color ="black") +
  scale_x_continuous(limits=c(-10, 10), breaks = seq(-10, 10, by = 2)) + 
  scale_y_continuous(limits=c(-60, 140),breaks = seq(-60, 120, by = 20)) +
  ylab("Point Estimates") + 
  xlab("Years Until/Since Law 70 Land Title Assigned") +
  ggtitle("Guerrilla attacks") +
  theme_minimal()

###PACIFIC
est_par = feols(as_sumattacks_paramilitary ~
                  any_titleXpost + 
                  any_title2 + 
                  post2 + 
                  altura_post + 
                  altura + 
                  rainfall_post + 
                  rainfall + 
                  coca_0_1_post + 
                  coca_0_1 + 
                  presence_mines1560_post + 
                  presence_mines1560 + 
                  num_encomiendas1560_post  + 
                  num_encomiendas1560 + 
                  royalroaddist_post + 
                  royalroaddist + 
                  distancia_mercado_post + 
                  distancia_mercado + 
                  pop95_post + 
                  pop95 + 
                  pct_minority85_post + 
                  pct_minority85 + 
                  conflictos_1901_1917_post +
                  conflictos_1901_1917 +
                  conflictos_1918_1931_post +
                  conflictos_1918_1931 +
                  anucraids1971_78_post + 
                  anucraids1971_78 + 
                  totviolencia85_post +
                  totviolencia85 +
                  spatlagguerrsum100_100_1995_post + 
                  spatlagguerrsum100_100_1995 + 
                  ltotal_plots_rfmd_1960_85_post +
                  ltotal_plots_rfmd_1960_85  + 
                  i(time_to_event, any_title2, ref = -1)| codmpio + Año, 
                cluster = "codmpio" ,
                data = base %>% dplyr::filter(gpacifica == 1))

est_par.output <- cbind.data.frame(est_par$coefficients,
                                   SE = est_par$se) %>% 
  rownames_to_column() %>% 
  dplyr::filter(str_detect(rowname, fixed(":")))

est_par.output$rowname <-gsub( "time_to_event::", "", est_par.output$rowname )
est_par.output$rowname <-gsub( ":any_title2", "", est_par.output$rowname )

est_par.output <- est_par.output %>% 
  dplyr::rename("time" = "rowname",
                "Estimate" = "est_par$coefficients",
                "SE" = "SE") %>% 
  dplyr::mutate(time = as.numeric(time)) %>% 
  add_row(time = -1, Estimate = 0, SE = 0) %>% 
  dplyr::filter(time>=-9 & time<=9) %>% 
  dplyr::mutate(Estimate = Estimate*100,
                LB_CI = Estimate-(1.96*SE)*100,
                UB_CI = Estimate+(1.96*SE)*100) %>% 
  dplyr::rename("Estimate" = "Estimate")%>% 
  dplyr::select(-SE)

plot1 <- ggplot(data = est_par.output, mapping = aes(y = Estimate, x = time)) +
  geom_point(size = 2) + 
  geom_line() +
  geom_line(aes(y = UB_CI), colour = "blue", linetype = "dashed") +
  geom_line(aes(y = LB_CI), colour = "blue", linetype = "dashed") +
  geom_hline(yintercept = 0, linetype="solid", color ="black") +
  geom_vline(xintercept = -1, linetype="solid", color ="black") +
  scale_x_continuous(limits=c(-10, 10), breaks = seq(-10, 10, by = 2)) + 
  scale_y_continuous(limits=c(-60, 140),breaks = seq(-60, 120, by = 20)) +
  ylab("Point Estimates") + 
  xlab("Years Until/Since Law 70 Land Title Assigned") +
  ggtitle("Paramilitar attacks") +
  theme_minimal()


est_pub = feols(as_sumattacks_public ~
                  any_titleXpost + 
                  any_title2 + 
                  post2 + 
                  altura_post + 
                  altura + 
                  rainfall_post + 
                  rainfall + 
                  coca_0_1_post + 
                  coca_0_1 + 
                  presence_mines1560_post + 
                  presence_mines1560 + 
                  num_encomiendas1560_post  + 
                  num_encomiendas1560 + 
                  royalroaddist_post + 
                  royalroaddist + 
                  distancia_mercado_post + 
                  distancia_mercado + 
                  pop95_post + 
                  pop95 + 
                  pct_minority85_post + 
                  pct_minority85 + 
                  conflictos_1901_1917_post +
                  conflictos_1901_1917 +
                  conflictos_1918_1931_post +
                  conflictos_1918_1931 +
                  anucraids1971_78_post + 
                  anucraids1971_78 + 
                  totviolencia85_post +
                  totviolencia85 +
                  spatlagguerrsum100_100_1995_post + 
                  spatlagguerrsum100_100_1995 + 
                  ltotal_plots_rfmd_1960_85_post +
                  ltotal_plots_rfmd_1960_85  + 
                  i(time_to_event, any_title2, ref = -1)| codmpio + Año, 
                cluster = "codmpio" ,
                data = base %>% dplyr::filter(gpacifica == 1))

est_pub.output <- cbind.data.frame(est_pub$coefficients,
                                   SE = est_pub$se) %>% 
  rownames_to_column() %>% 
  dplyr::filter(str_detect(rowname, fixed(":")))

est_pub.output$rowname <-gsub( "time_to_event::", "", est_pub.output$rowname )
est_pub.output$rowname <-gsub( ":any_title2", "", est_pub.output$rowname )

est_pub.output <- est_pub.output %>% 
  dplyr::rename("time" = "rowname",
                "Estimate" = "est_pub$coefficients",
                "SE" = "SE") %>% 
  dplyr::mutate(time = as.numeric(time)) %>% 
  add_row(time = -1, Estimate = 0, SE = 0) %>% 
  dplyr::filter(time>=-9 & time<=9) %>% 
  dplyr::mutate(Estimate = Estimate*100,
                LB_CI = Estimate-(1.96*SE)*100,
                UB_CI = Estimate+(1.96*SE)*100) %>% 
  dplyr::rename("Estimate" = "Estimate")%>% 
  dplyr::select(-SE)

plot2 <- ggplot(data = est_pub.output, mapping = aes(y = Estimate, x = time)) +
  geom_point(size = 2) + 
  geom_line() +
  geom_line(aes(y = UB_CI), colour = "blue", linetype = "dashed") +
  geom_line(aes(y = LB_CI), colour = "blue", linetype = "dashed") +
  geom_hline(yintercept = 0, linetype="solid", color ="black") +
  geom_vline(xintercept = -1, linetype="solid", color ="black") +
  scale_x_continuous(limits=c(-10, 10), breaks = seq(-10, 10, by = 2)) + 
  scale_y_continuous(limits=c(-60, 140),breaks = seq(-60, 120, by = 20)) +
  ylab("Point Estimates") + 
  xlab("Years Until/Since Law 70 Land Title Assigned") +
  ggtitle("Police & Army attacks attacks") +
  theme_minimal()

est_guer = feols(as_sumattacks_guerrilla ~
                   any_titleXpost + 
                   any_title2 + 
                   post2 + 
                   altura_post + 
                   altura + 
                   rainfall_post + 
                   rainfall + 
                   coca_0_1_post + 
                   coca_0_1 + 
                   presence_mines1560_post + 
                   presence_mines1560 + 
                   num_encomiendas1560_post  + 
                   num_encomiendas1560 + 
                   royalroaddist_post + 
                   royalroaddist + 
                   distancia_mercado_post + 
                   distancia_mercado + 
                   pop95_post + 
                   pop95 + 
                   pct_minority85_post + 
                   pct_minority85 + 
                   conflictos_1901_1917_post +
                   conflictos_1901_1917 +
                   conflictos_1918_1931_post +
                   conflictos_1918_1931 +
                   anucraids1971_78_post + 
                   anucraids1971_78 + 
                   totviolencia85_post +
                   totviolencia85 +
                   spatlagguerrsum100_100_1995_post + 
                   spatlagguerrsum100_100_1995 + 
                   ltotal_plots_rfmd_1960_85_post +
                   ltotal_plots_rfmd_1960_85  + 
                   i(time_to_event, any_title2, ref = -1)| codmpio + Año, 
                 cluster = "codmpio" ,
                 data = base %>% dplyr::filter(gpacifica == 1))

est_guer.output <- cbind.data.frame(est_guer$coefficients,
                                    SE = est_guer$se) %>% 
  rownames_to_column() %>% 
  dplyr::filter(str_detect(rowname, fixed(":")))

est_guer.output$rowname <-gsub( "time_to_event::", "", est_guer.output$rowname )
est_guer.output$rowname <-gsub( ":any_title2", "", est_guer.output$rowname )

est_guer.output <- est_guer.output %>% 
  dplyr::rename("time" = "rowname",
                "Estimate" = "est_guer$coefficients",
                "SE" = "SE") %>% 
  dplyr::mutate(time = as.numeric(time)) %>% 
  add_row(time = -1, Estimate = 0, SE = 0) %>% 
  dplyr::filter(time>=-9 & time<=9) %>% 
  dplyr::mutate(Estimate = Estimate*100,
                LB_CI = Estimate-(1.96*SE)*100,
                UB_CI = Estimate+(1.96*SE)*100) %>% 
  dplyr::rename("Estimate" = "Estimate")%>% 
  dplyr::select(-SE)

plot3 <- ggplot(data = est_guer.output, mapping = aes(y = Estimate, x = time)) +
  geom_point(size = 2) + 
  geom_line() +
  geom_line(aes(y = UB_CI), colour = "blue", linetype = "dashed") +
  geom_line(aes(y = LB_CI), colour = "blue", linetype = "dashed") +
  geom_hline(yintercept = 0, linetype="solid", color ="black") +
  geom_vline(xintercept = -1, linetype="solid", color ="black") +
  scale_x_continuous(limits=c(-10, 10), breaks = seq(-10, 10, by = 2)) + 
  scale_y_continuous(limits=c(-60, 140),breaks = seq(-60, 120, by = 20)) +
  ylab("Point Estimates") + 
  xlab("Years Until/Since Law 70 Land Title Assigned") +
  ggtitle("Guerrilla attacks") +
  theme_minimal()

Main_plot <- ggarrange(plot1_all ,plot2_all, plot3_all, plot1, plot2, plot3, ncol = 3, nrow = 2)


annotate_figure(Main_plot, 
                top = text_grob("Full Sample with controls", 
                                           color = "black", face = "bold", size = 14),
                bottom = text_grob("Pacific Region Only with controls", 
                                color = "black", face = "bold", size = 14))

